import pandas as pd
import numpy as np
import plotly.express as px
base_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/'
confirmed_df = pd.read_csv(base_url + 'time_series_covid19_confirmed_global.csv')
confirmed_df
| Province/State | Country/Region | Lat | Long | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | ... | 2/21/22 | 2/22/22 | 2/23/22 | 2/24/22 | 2/25/22 | 2/26/22 | 2/27/22 | 2/28/22 | 3/1/22 | 3/2/22 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | Afghanistan | 33.939110 | 67.709953 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 172441 | 172716 | 172901 | 173047 | 173084 | 173146 | 173395 | 173659 | 173879 | 174073 |
| 1 | NaN | Albania | 41.153300 | 20.168300 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 270370 | 270455 | 270734 | 270947 | 271141 | 271141 | 271527 | 271563 | 271702 | 271825 |
| 2 | NaN | Algeria | 28.033900 | 1.659600 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 264201 | 264365 | 264488 | 264603 | 264706 | 264778 | 264855 | 264936 | 265010 | 265079 |
| 3 | NaN | Andorra | 42.506300 | 1.521800 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 37589 | 37820 | 37901 | 37958 | 37999 | 37999 | 37999 | 37999 | 38165 | 38249 |
| 4 | NaN | Angola | -11.202700 | 17.873900 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 98658 | 98671 | 98698 | 98701 | 98701 | 98701 | 98701 | 98741 | 98746 | 98746 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 279 | NaN | West Bank and Gaza | 31.952200 | 35.233200 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 633684 | 636055 | 638172 | 640214 | 640214 | 640214 | 640214 | 645947 | 647203 | 648039 |
| 280 | NaN | Winter Olympics 2022 | 39.904200 | 116.407400 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 509 | 509 | 510 | 510 | 514 | 514 | 518 | 521 | 524 | 524 |
| 281 | NaN | Yemen | 15.552727 | 48.516388 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 11741 | 11746 | 11751 | 11751 | 11759 | 11760 | 11769 | 11771 | 11771 | 11771 |
| 282 | NaN | Zambia | -13.133897 | 27.849332 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 311264 | 311592 | 311888 | 312118 | 312374 | 312611 | 312707 | 312750 | 312970 | 313203 |
| 283 | NaN | Zimbabwe | -19.015438 | 29.154857 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 233571 | 233980 | 234589 | 234967 | 235467 | 235803 | 235803 | 236380 | 236871 | 237503 |
284 rows × 775 columns
#removing Province/State, Lat, Long column
confirmed_df = confirmed_df.drop(columns = ['Lat','Long', 'Province/State'])
#checking data frame
confirmed_df.head()
| Country/Region | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | 1/28/20 | 1/29/20 | 1/30/20 | ... | 2/21/22 | 2/22/22 | 2/23/22 | 2/24/22 | 2/25/22 | 2/26/22 | 2/27/22 | 2/28/22 | 3/1/22 | 3/2/22 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 172441 | 172716 | 172901 | 173047 | 173084 | 173146 | 173395 | 173659 | 173879 | 174073 |
| 1 | Albania | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 270370 | 270455 | 270734 | 270947 | 271141 | 271141 | 271527 | 271563 | 271702 | 271825 |
| 2 | Algeria | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 264201 | 264365 | 264488 | 264603 | 264706 | 264778 | 264855 | 264936 | 265010 | 265079 |
| 3 | Andorra | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 37589 | 37820 | 37901 | 37958 | 37999 | 37999 | 37999 | 37999 | 38165 | 38249 |
| 4 | Angola | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 98658 | 98671 | 98698 | 98701 | 98701 | 98701 | 98701 | 98741 | 98746 | 98746 |
5 rows × 772 columns
#merging rows for same country/regions
confirmed_df = confirmed_df.groupby(by='Country/Region').aggregate(np.sum).T
#checking data frame
confirmed_df
| Country/Region | Afghanistan | Albania | Algeria | Andorra | Angola | Antarctica | Antigua and Barbuda | Argentina | Armenia | Australia | ... | Uruguay | Uzbekistan | Vanuatu | Venezuela | Vietnam | West Bank and Gaza | Winter Olympics 2022 | Yemen | Zambia | Zimbabwe |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1/22/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1/23/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 |
| 1/24/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 |
| 1/25/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 |
| 1/26/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | ... | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2/26/22 | 173146 | 271141 | 264778 | 37999 | 98701 | 11 | 7437 | 8893568 | 419423 | 3188117 | ... | 838341 | 236174 | 19 | 514315 | 3219177 | 640214 | 514 | 11760 | 312611 | 235803 |
| 2/27/22 | 173395 | 271527 | 264855 | 37999 | 98701 | 11 | 7437 | 8897178 | 419693 | 3209599 | ... | 840511 | 236299 | 19 | 514315 | 3321005 | 640214 | 518 | 11769 | 312707 | 235803 |
| 2/28/22 | 173659 | 271563 | 264936 | 37999 | 98741 | 11 | 7437 | 8900656 | 419832 | 3235017 | ... | 842462 | 236405 | 19 | 515124 | 3443485 | 645947 | 521 | 11771 | 312750 | 236380 |
| 3/1/22 | 173879 | 271702 | 265010 | 38165 | 98746 | 11 | 7447 | 8904176 | 420156 | 3256772 | ... | 844400 | 236501 | 19 | 515124 | 3557629 | 647203 | 524 | 11771 | 312970 | 236871 |
| 3/2/22 | 174073 | 271825 | 265079 | 38249 | 98746 | 11 | 7449 | 8912317 | 420498 | 3296784 | ... | 846868 | 236596 | 19 | 515582 | 3709481 | 648039 | 524 | 11771 | 313203 | 237503 |
771 rows × 198 columns
#changing column name (Country/Region to Date)
confirmed_df.index.name = 'Date'
#reset index
confirmed_df = confirmed_df.reset_index()
confirmed_df.tail()
| Country/Region | Date | Afghanistan | Albania | Algeria | Andorra | Angola | Antarctica | Antigua and Barbuda | Argentina | Armenia | ... | Uruguay | Uzbekistan | Vanuatu | Venezuela | Vietnam | West Bank and Gaza | Winter Olympics 2022 | Yemen | Zambia | Zimbabwe |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 766 | 2/26/22 | 173146 | 271141 | 264778 | 37999 | 98701 | 11 | 7437 | 8893568 | 419423 | ... | 838341 | 236174 | 19 | 514315 | 3219177 | 640214 | 514 | 11760 | 312611 | 235803 |
| 767 | 2/27/22 | 173395 | 271527 | 264855 | 37999 | 98701 | 11 | 7437 | 8897178 | 419693 | ... | 840511 | 236299 | 19 | 514315 | 3321005 | 640214 | 518 | 11769 | 312707 | 235803 |
| 768 | 2/28/22 | 173659 | 271563 | 264936 | 37999 | 98741 | 11 | 7437 | 8900656 | 419832 | ... | 842462 | 236405 | 19 | 515124 | 3443485 | 645947 | 521 | 11771 | 312750 | 236380 |
| 769 | 3/1/22 | 173879 | 271702 | 265010 | 38165 | 98746 | 11 | 7447 | 8904176 | 420156 | ... | 844400 | 236501 | 19 | 515124 | 3557629 | 647203 | 524 | 11771 | 312970 | 236871 |
| 770 | 3/2/22 | 174073 | 271825 | 265079 | 38249 | 98746 | 11 | 7449 | 8912317 | 420498 | ... | 846868 | 236596 | 19 | 515582 | 3709481 | 648039 | 524 | 11771 | 313203 | 237503 |
5 rows × 199 columns
#melt dataframe
confirmed_melt_df = confirmed_df.melt(id_vars='Date').copy()
#checking melting
confirmed_melt_df
| Date | Country/Region | value | |
|---|---|---|---|
| 0 | 1/22/20 | Afghanistan | 0 |
| 1 | 1/23/20 | Afghanistan | 0 |
| 2 | 1/24/20 | Afghanistan | 0 |
| 3 | 1/25/20 | Afghanistan | 0 |
| 4 | 1/26/20 | Afghanistan | 0 |
| ... | ... | ... | ... |
| 152653 | 2/26/22 | Zimbabwe | 235803 |
| 152654 | 2/27/22 | Zimbabwe | 235803 |
| 152655 | 2/28/22 | Zimbabwe | 236380 |
| 152656 | 3/1/22 | Zimbabwe | 236871 |
| 152657 | 3/2/22 | Zimbabwe | 237503 |
152658 rows × 3 columns
#rename column
confirmed_melt_df.rename(columns={'value':'Confirmed'},inplace=True)
confirmed_melt_df.head()
| Date | Country/Region | Confirmed | |
|---|---|---|---|
| 0 | 1/22/20 | Afghanistan | 0 |
| 1 | 1/23/20 | Afghanistan | 0 |
| 2 | 1/24/20 | Afghanistan | 0 |
| 3 | 1/25/20 | Afghanistan | 0 |
| 4 | 1/26/20 | Afghanistan | 0 |
#converting Date(strings) to actual day/time format
max_date = confirmed_melt_df['Date'].max()
max_date
'9/9/21'
confirmed_melt_df['Date'] = pd.to_datetime(confirmed_melt_df['Date'])
confirmed_melt_df.head()
| Date | Country/Region | Confirmed | |
|---|---|---|---|
| 0 | 2020-01-22 | Afghanistan | 0 |
| 1 | 2020-01-23 | Afghanistan | 0 |
| 2 | 2020-01-24 | Afghanistan | 0 |
| 3 | 2020-01-25 | Afghanistan | 0 |
| 4 | 2020-01-26 | Afghanistan | 0 |
confirmed_melt_df['Date'] = confirmed_melt_df['Date'].dt.strftime('%m/%d/%y')
max_date = confirmed_melt_df['Date'].max()
max_date
'12/31/21'
#vizualization of data
#maximum date confirmed cases number
total_confirmed_df = confirmed_melt_df[confirmed_melt_df['Date']==max_date]
total_confirmed_df
| Date | Country/Region | Confirmed | |
|---|---|---|---|
| 709 | 12/31/21 | Afghanistan | 158084 |
| 1480 | 12/31/21 | Albania | 210224 |
| 2251 | 12/31/21 | Algeria | 218432 |
| 3022 | 12/31/21 | Andorra | 23740 |
| 3793 | 12/31/21 | Angola | 81593 |
| ... | ... | ... | ... |
| 149512 | 12/31/21 | West Bank and Gaza | 469748 |
| 150283 | 12/31/21 | Winter Olympics 2022 | 0 |
| 151054 | 12/31/21 | Yemen | 10126 |
| 151825 | 12/31/21 | Zambia | 254274 |
| 152596 | 12/31/21 | Zimbabwe | 213258 |
198 rows × 3 columns
# sum of total confirmed cases
total_confirmed = total_confirmed_df['Confirmed'].sum()
total_confirmed
288666476
#total cases per country
fig = px.bar(total_confirmed_df, x='Country/Region', y='Confirmed')
fig.show()
# showing top 30 confirmed countries
fig = px.bar(total_confirmed_df.sort_values('Confirmed',ascending=False).head(30), x='Country/Region', y='Confirmed')
fig.show()
#historical trend of countries
figTwo = px.scatter(confirmed_melt_df,x='Date', y='Confirmed',color='Country/Region')
figTwo.show()
#specific country affection rate
figThree = px.line(confirmed_melt_df[confirmed_melt_df['Country/Region']=='Bangladesh'],x='Date', y='Confirmed')
figThree.show()